Prepare the Workspace
knitr::opts_chunk$set(warning=FALSE, message=FALSE, error = FALSE) # rmd options
rm(list = ls()); invisible(gc()) # cleaning
Control Block
# f, f, 0.25 looks nice.
allowmigrants <- F # OPTIONS: T, F
allowsympatry <- F # OPTIONS: T, F
minoverperc <- 0 # remove pairs that do not have thermal overlap (anagenesis)
costvar <- "ele"
Packages & Prefs
options(scipen = 999) # turn off scientific notation
'%notin%' <- Negate('%in%')
require(ggplot2) # load packages
require(GGally)
require(viridis)
require(caper)
library(dplyr)
library(stringr)
library(maps)
library(ape)
library(EnvStats)
library(forecast)
require(nlme)
require(geodist)
require(letsR)
require(spdep)
require(spatialreg)
require(rnaturalearth)
require(rnaturalearthdata)
require(rgeos)
require(sf)
require(rgdal)
require(raster)
world <- ne_coastline(scale = "medium", returnclass = "sf")
vlog <- function(x){
log( x + abs(min( x , na.rm = T)) + 1)
}
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
exclusion <- read.csv(file = "exclusion_nonsimpatric_nonmigrant.csv")
exclusion$realm1red[is.na(exclusion$realm1red)] <- "NA" # NA is north america not R's NA value. fix.
exclusion$realm2green[is.na(exclusion$realm2green)] <- "NA"
Load Main Data
# main dataframe ---------------------------------
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
load(file = "Pair_Barrier_Data_FEB2021.rdata")
mydata <- mypairdata; rm(mypairdata)
rownames(mydata) <- mydata$Species.1
mydatahold <- mydata
initial masks
# migration ---
if(allowmigrants == F){
mydata <- mydata[which(mydata$Migration == 1.0),]
}
# patry
if(allowsympatry == F){
mydata <- mydata[which(mydata[,paste0(costvar, "_c0")] > 0 ),] # doesnt matter if you use ele, mat, vart paths here, will be same answer
}
sort
mydata$uniquePairId == exclusion$mydata.uniquePairId
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[265] FALSE FALSE FALSE FALSE
x <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
mydata <- mydata[order(match(mydata$uniquePairId,exclusion$mydata.uniquePairId)), ]
y <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
# sum(y!=x, na.rm = T) # checks.
# head(mydata)
mydata$uniquePairId == exclusion$mydata.uniquePairId
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[41] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[81] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[161] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[201] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rm(x,y)
# # Basic range maps for all pairs (no paths)
# wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
# setwd(wdPAM); load("cbPAM.rdata")
# setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
# pdf("pairmaps2.pdf", width = 19, height = 9.25)
# for (i in 1:nrow(mydata)){
# x <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.1bl[i])]
# x <- as.data.frame(x[x[,3] == 1,])
# if(ncol(x) == 1) {
# x <- t(x)
# colnames(x) <- c("lon", "lat", "pres")
# x <- as.data.frame(x)
# } else {
# colnames(x) <- c("lon", "lat", "pres")
# }
#
# y <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.2bl[i])]
# y <- as.data.frame(y[y[,3] == 1,])
# if(ncol(y) == 1) {
# y <- t(y)
# colnames(y) <- c("lon", "lat", "pres")
# y <- as.data.frame(y)
#
# } else {
# colnames(y) <- c("lon", "lat", "pres")
# }
# z <- ggplot(world)+
# geom_sf() +
# geom_point(data = x, aes(y=lat, x=lon), color = "red") +
# geom_point(data = y, aes(y=lat, x=lon), color = "green") +
# theme_bw() +
# ggtitle(i)
# print(z)
# }
# print(i)
# dev.off()
mydata_exclusion <- cbind(mydata, exclusion)
save(mydata_exclusion, file = "mydata_exclusion.rdata")
mydata$realm1 <- exclusion$realm1red
mydata$realm2 <- exclusion$realm2green
mydata$realm <- paste0(mydata$realm1, mydata$realm2)
table(mydata$realm)
AAAA AAIM ATAT ATIM ATPA IMAA IMAT IMIM IMNT NANA NTAA NTNA NTNT OCOC PAAT PANA PAPA
30 1 45 3 1 3 3 27 1 4 1 1 135 2 3 2 6
mydata$realm[mydata$realm == "AAIM"]; mydata$realm[mydata$realm == "IMAA"] <- "AAIM"
[1] "AAIM"
mydata$realm[mydata$realm == "ATIM"]; mydata$realm[mydata$realm == "IMAT"] <- "ATIM"
[1] "ATIM" "ATIM" "ATIM"
mydata$realm[mydata$realm == "ATPA"]; mydata$realm[mydata$realm == "PAAT"] <- "ATPA"
[1] "ATPA"
mydata$realm[mydata$realm == "IMNT"]; mydata$realm[mydata$realm == "NTIM"] <- "IMNT"
[1] "IMNT"
mydata$realm[mydata$realm == "NTAA"]; mydata$realm[mydata$realm == "AANT"] <- "NTAA"
[1] "NTAA"
mydata$realm[mydata$realm == "NTNA"]; mydata$realm[mydata$realm == "NANT"] <- "NTNA"
[1] "NTNA"
mydata$realm[mydata$realm == "PANA"]; mydata$realm[mydata$realm == "NAPA"] <- "PANA"
[1] "PANA" "PANA"
mydata$realm <- as.factor(mydata$realm); mydata$realm <- relevel(mydata$realm, "NTNT")
mydata$landgap <- as.logical(exclusion$island)
mydata$cosmopolitan <- as.logical(exclusion$cosmopolitan)
mydata$new.old <- as.logical(exclusion$new.old)
rm(exclusion, mydata_exclusion)
Impose masks & do calcuations
# filter cosmopolitan and new/old world species (there are relatively few after imposing previous masks.)
mydata <- mydata[which(mydata$cosmopolitan == FALSE & mydata$new.old == FALSE),]
# dependent variable: elevational barrier size ---
mydata$cost <- mydata[, paste0(costvar, "_c25")]
# data filtering -----------------------
# thermal overlap ---
mydata$MAT_overlap <- mydata[,paste0("MAT", "_ov_perc_smrnge")]
mydata <- mydata[mydata$MAT_overlap > minoverperc,]
# update sort order --------------------
mydata$sortorder <- seq(1:nrow(mydata))
# longitude ----------------------------
mydata$lon <- mydata[,paste0("lon_mean_pair_", costvar, "_c25")]
# latitude ----------------------------
mydata$lat <- mydata[,paste0("lat_mean_pair_", costvar, "_c25")]
# temperature breadth -----------------
mydata$tas_breadth <- mydata$tas_range # mean(mean(sp1 annual tas range -- one value per cell), mean(sp2 annual tas range -- one value per cell))
# mean annual temperature --------------
mydata$tas_position <- mydata$tas_mean # mean(mean(sp1 annual tas mean -- one value per cell), mean(sp2 annual tas mean -- one value per cell))
# precipitation breadth ---------------
mydata$pcp_breadth <- mydata$pcp_range # mean(mean(sp1 annual pcp range -- one value per cell), mean(sp2 annual pcp range -- one value per cell))
# precipitation breadth ---------------
mydata$pcp_position <- mydata$pcp_mean # mean(mean(sp1 annual pcp mean -- one value per cell), mean(sp2 annual pcp mean -- one value per cell))
# distance -----------------------------
mydata$distance <- mydata[,paste0("centroid_distance_",costvar,"_c25")]
# mountain mass ------------------------
mtns <- readOGR(dsn="~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/GMBA", layer="GMBA Mountain Inventory_v1.2-World", verbose = FALSE)
wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
setwd(wdPAM); load("LonLat_BirdPAM_raster.rdata")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
mtns <- rasterize(mtns, LonLat_BirdPAM_raster)
mtns@data@values[!is.na(mtns@data@values)] <- 1 # replace mountain IDs with simple coding. 1 for mountain...
mtns@data@values[is.na(mtns@data@values)] <- 0 # ...0 for no mountain
mydata$mtn_mass <- NA
for (i in 1:nrow(mydata)) {
coords1<-data.frame(lon=mydata$lon[i], lat=mydata$lat[i]); coordinates(coords1)<-c("lon","lat"); crs(coords1)<-crs(LonLat_BirdPAM_raster) # get coordinates
z <- extract(mtns, coords1, buffer = raster::pointDistance(c(0,0), c(0,8), lonlat = T))
mydata$mtn_mass[i] <- sum(z[[1]]) / length(z[[1]])
}
load phylo and prune
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/BirdTrees")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/BirdTrees inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
load(file = "BirdTrees.Rdata")
Save freame for CB
Neighbors for sptial analysis.
distm <- mydata[,c("lon", "lat")]
distm <- geodist(distm, measure = "geodesic")
rownames(distm) <- rownames(mydata)
colnames(distm) <- rownames(mydata)
basecols <- ncol(mydata)
# neightbors
coords<-cbind(mydata$lon, mydata$lat); coords<-as.matrix(coords) ; row.names(coords)<-rownames(mydata)
k1 <- knn2nb(knearneigh(coords, longlat = T))
knearneigh: identical points found
nb<- dnearneigh(coords,row.names = row.names(coords), d1=0,d2=max(unlist(nbdists(k1, coords, longlat = T))),longlat=T)
plots for transformations
for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap")) {
hist(mydata[, i], breaks = 50, main = i)
}
world plots
for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap", "realm1", "realm2", "landgap")) {
if(i %in% c("cost")){
myc <- mydata[, i]; myc <- vlog(myc)
x <- ggplot(world)+
geom_sf() +
geom_point(data = mydata[order(mydata[, i], decreasing = F),], aes(y=lat, x=lon, color = myc), alpha = 0.75) +
ggtitle(i)+
scale_color_viridis()
print(x)
} else if (i %in% c("realm1", "realm2")){
x <- ggplot(world)+
geom_sf() +
geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.75) +
ggtitle(paste0("ln ", i))+
scale_color_viridis(discrete = T)
print(x)
} else {
x <- ggplot(world)+
geom_sf() +
geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.75) +
ggtitle(i)+
scale_color_viridis()
print(x)
}
}
mydata <- mydata[, 1:basecols]
# no spatial filtering ---------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML");summary(m)
Generalized least squares fit by REML
Model: scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth)))
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
0.02407895
Coefficients:
Correlation:
(Intr)
scale(I(vlog(tas_breadth))) -0.013
Standardized residuals:
Min Q1 Med Q3 Max
-2.69517748 -0.72862730 0.02946462 0.64630925 2.64543853
Residual standard error: 0.9898138
Degrees of freedom: 235 total; 233 residual
print(paste("Correlation between data and prediction: ", cor(predict(m),scale(I(vlog(mydata$cost))))))
[1] "Correlation between data and prediction: 0.17758346179705"
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
# plot ---------------------------------
plot(scale(I(vlog(mydata$tas_breadth))), scale(I(vlog(mydata$cost))), pch = 16,
xlab = "thermal niche breadth", ylab = paste0("log ", costvar, " cost" ), main = "Global model")
myx <- seq(min(scale(vlog(mydata$tas_breadth))), max(scale(vlog(mydata$tas_breadth))), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(cost))) ~ scale(I(log(tas_breadth))), data = mydata, method = "REML")
myx <- seq(min(scale(vlog(mydata$tas_breadth))), max(scale(vlog(mydata$tas_breadth))), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)
# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component..
[1] 0.00000001756702
# spatial filtering --------------------
rm(sarcol)
object 'sarcol' not found
sarcol <- SpatialFiltering(formula = scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))),
data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) # 4 vectors created.
[1] "uniquePairId" "Species.1" "Species.2" "Species.1bl" "Species.2bl" "cost" "lat" "lon" "tas_breadth"
[10] "tas_position" "pcp_breadth" "pcp_position" "mtn_mass" "water_buffering" "dispersal_ability" "pair_age" "distance" "boundary_length"
[19] "MAT_overlap" "realm1" "realm2" "landgap" "V23" "V24" "V25" "V26" "V27"
[28] "V28" "V29" "V30" "V31"
# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))) + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
Model: scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))) + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
0.1132853
Coefficients:
Correlation:
(Intr) s(I((_ V23 V24 V25 V26 V27 V28 V29 V30
scale(I(vlog(tas_breadth))) -0.030
V23 0.060 -0.016
V24 -0.033 0.002 -0.008
V25 -0.031 0.016 -0.012 0.005
V26 0.015 0.020 -0.003 -0.014 0.004
V27 0.032 0.007 0.004 -0.015 -0.006 0.006
V28 -0.018 -0.020 0.003 0.003 0.001 -0.010 -0.003
V29 0.068 0.000 0.001 0.000 0.002 0.003 -0.017 0.000
V30 -0.038 0.001 -0.012 0.002 0.017 -0.001 -0.018 0.001 0.000
V31 0.031 0.024 -0.007 -0.013 -0.011 0.013 0.061 -0.015 0.001 -0.014
Standardized residuals:
Min Q1 Med Q3 Max
-3.0866866 -0.5483974 0.1080253 0.6419830 2.6354891
Residual standard error: 0.8999846
Degrees of freedom: 235 total; 224 residual
[,1]
[1,] 0.5013446
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.9215695
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
# for(i in c("V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31")){
# x <- ggplot(world)+
# geom_sf() +
# geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
# scale_color_viridis()
# print(x)
# }
Is thermal niche breadth predicted by latitude plus latitude2?
mydata <- mydata[, 1:basecols]
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m)
Generalized least squares fit by REML
Model: scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2))
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
0.3441942
Coefficients:
Correlation:
(Intr) scl(l)
scale(lat) 0.026
scale(I(lat^2)) -0.078 0.085
Standardized residuals:
Min Q1 Med Q3 Max
-1.8535474 -0.6164704 0.1168708 0.7632055 3.0128361
Residual standard error: 0.7782013
Degrees of freedom: 235 total; 232 residual
print(paste("Correlation between data and prediction: ", cor(predict(m),scale(I(vlog(mydata$tas_breadth))))))
[1] "Correlation between data and prediction: 0.700677229106234"
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
# plot ---------------------------------
plot(scale(mydata$lat), scale(I(vlog(mydata$tas_breadth))), pch = 16,
xlab = "latitude", ylab = "thermal niche breadth", main = "Global model")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), data = mydata, method = "REML")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)
# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component..
[1] 0.000000000000000000000000000000000000000000000000000000000000000009068726
# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(formula = scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)),
data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) # 4 vectors created.
[1] "uniquePairId" "Species.1" "Species.2" "Species.1bl" "Species.2bl" "cost" "lat" "lon" "tas_breadth"
[10] "tas_position" "pcp_breadth" "pcp_position" "mtn_mass" "water_buffering" "dispersal_ability" "pair_age" "distance" "boundary_length"
[19] "MAT_overlap" "realm1" "realm2" "landgap" "V23" "V24" "V25" "V26" "V27"
[28] "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36"
[37] "V37" "V38"
# vector 1 added ---
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)) + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 + V32 + V33 + V34 + V35 + V36 + V37 + V38, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
Model: scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)) + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 + V32 + V33 + V34 + V35 + V36 + V37 + V38
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
0.5675592
Coefficients:
Correlation:
(Intr) scl(l) s(I(^2 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37
scale(lat) 0.045
scale(I(lat^2)) -0.077 0.055
V23 0.011 -0.056 -0.055
V24 0.070 0.021 0.019 0.048
V25 0.053 0.032 -0.058 0.076 0.005
V26 -0.024 0.005 -0.134 0.208 0.006 0.167
V27 -0.072 -0.037 0.034 0.040 -0.044 -0.007 -0.014
V28 -0.007 -0.004 -0.078 0.050 -0.032 -0.022 0.090 0.028
V29 0.028 -0.005 -0.006 -0.069 -0.003 0.037 -0.018 -0.057 -0.047
V30 0.002 0.011 -0.022 -0.006 0.002 -0.052 0.005 0.011 0.048 -0.004
V31 -0.038 -0.018 -0.018 -0.017 -0.009 -0.013 -0.005 0.030 -0.011 -0.008 -0.002
V32 0.005 -0.030 -0.009 -0.062 -0.030 0.021 -0.006 -0.046 -0.019 0.014 -0.023 -0.004
V33 0.001 -0.005 0.012 0.022 0.004 -0.002 -0.021 0.004 -0.003 -0.002 -0.008 -0.011 -0.037
V34 -0.008 -0.016 -0.070 0.043 -0.012 -0.014 0.026 0.012 0.050 -0.015 0.014 0.003 -0.007 -0.003
V35 -0.021 0.062 0.018 -0.018 0.010 -0.030 -0.030 0.026 0.022 -0.022 0.074 0.012 -0.027 0.010 0.036
V36 -0.005 0.012 -0.007 -0.006 -0.010 -0.013 -0.012 -0.001 -0.007 -0.001 -0.001 0.001 -0.020 0.002 0.003 0.004
V37 -0.045 -0.014 -0.034 0.063 -0.007 -0.005 0.055 0.025 0.041 -0.022 0.021 0.008 -0.073 -0.028 0.013 -0.004 -0.003
V38 0.027 -0.017 -0.021 0.004 0.017 0.013 -0.006 0.010 -0.006 -0.003 -0.015 0.002 0.008 -0.014 -0.005 -0.012 0.001 -0.008
Standardized residuals:
Min Q1 Med Q3 Max
-2.0731629 -0.6096382 -0.1790288 0.2623287 3.2865813
Residual standard error: 0.5443762
Degrees of freedom: 235 total; 216 residual
[,1]
[1,] 0.1123255
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.9484634
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
#
# for(i in c("V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33", "V34", "V35", "V36", "V37", "V38")){
# x <- ggplot(world)+
# geom_sf() +
# geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
# scale_color_viridis()
# print(x)
# }
Is thermal niche breadth predicted by latitude plus latitude2?
mydata <- mydata[, 1:basecols]
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m)
Generalized least squares fit by REML
Model: scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2))
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
-0.01153228
Coefficients:
Correlation:
(Intr) scl(l)
scale(lat) -0.016
scale(I(lat^2)) 0.118 0.086
Standardized residuals:
Min Q1 Med Q3 Max
-3.09043113 -0.70872513 0.06485171 0.61849430 2.67235129
Residual standard error: 0.9785129
Degrees of freedom: 235 total; 232 residual
print(paste("Correlation between data and prediction: ", cor(predict(m),scale(I(vlog(mydata$cost))))))
[1] "Correlation between data and prediction: 0.218100113560087"
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
# plot ---------------------------------
plot(scale(mydata$lat), scale(I(vlog(mydata$cost))), pch = 16,
xlab = "latitude", ylab = "thermal niche breadth", main = "Global model")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), data = mydata, method = "REML")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)
# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component..
[1] 0.0001422316
# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(formula = scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)),
data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) # 4 vectors created.
[1] "uniquePairId" "Species.1" "Species.2" "Species.1bl" "Species.2bl" "cost" "lat" "lon" "tas_breadth" "tas_position" "pcp_breadth" "pcp_position" "mtn_mass"
[14] "water_buffering" "dispersal_ability" "pair_age" "distance" "boundary_length" "MAT_overlap" "realm1" "realm2" "landgap" "V23" "V24" "V25" "V26"
# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)) + V23 + V24 + V25 + V26, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
Model: scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)) + V23 + V24 + V25 + V26
Data: mydata
Correlation Structure: corPagel
Formula: ~Species.1
Parameter estimate(s):
lambda
0.1048059
Coefficients:
Correlation:
(Intr) scl(l) s(I(^2 V23 V24 V25
scale(lat) 0.001
scale(I(lat^2)) -0.064 0.104
V23 -0.070 -0.017 0.013
V24 -0.041 -0.007 -0.006 0.011
V25 -0.031 0.013 0.018 0.009 0.001
V26 0.032 0.005 -0.013 -0.021 -0.008 -0.004
Standardized residuals:
Min Q1 Med Q3 Max
-3.2135868 -0.5833627 0.1685737 0.6876315 2.6281318
Residual standard error: 0.9181122
Degrees of freedom: 235 total; 228 residual
[,1]
[1,] 0.4518531
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.7738208
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)
for(i in c("V23", "V24", "V25", "V26")){
x <- ggplot(world)+
geom_sf() +
geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
scale_color_viridis()
print(x)
}
print(paste("Correlation between data and prediction: ", cor(predict(m),scale(I(vlog(mydata$cost))))))
[1] "Correlation between data and prediction: 0.694878434079338"
Sensitivity Analyses
# 1 Pair age (all v. < 8mya (end of uplift of Andes))
# 2 Distance (all v. < 1500*1000) (1500 / 110 = ~ 22 degrees)
# 3 MAT_overlap (> 0% v. > 75% (more restrictive == more conservative for this measure.))
# 4 landgap (all v. nogap) *ALL GAPS ARE < 110km (two water grid cells marked as land for having >50% land @ 0.5 degree resolution.)